////////////////////////////////////////////////////////// // Name: ImportFromHTML.cpp // // // // Description: Contains functions for loading // // accessing Netscape Bookmark Files // // // // AUTHOR: Lucas Scharf, Jan 26 - Feb 27, 1998 // // // // REVISIONS: 1. Converted data storage from arrays to // // double-linked lists. // // // // VERSION: 2.00 // ////////////////////////////////////////////////////////// //////////////////////////////////////////////////////// // Compiler Directives // //////////////////////////////////////////////////////// //-- Standard Library -- #include //File stream I/O #include //used for atoi() #include //Used for strncmp //-- Custom Headers -- #include "Constants.h" //Global Constants #include "BookmarkFileStructs.h" //Data types #include "ListFunctions.h" //List Managment Functions //#include "UIFunctions.h" //User Interface functions //////////////////////////////////////////////////////// // Private Data Type Declarations // //////////////////////////////////////////////////////// typedef enum LastAddedType {NONE, FOLDER, BOOKMARK}; //////////////////////////////////////////////////////// // Private Function Prototypes // //////////////////////////////////////////////////////// //-- Generate Bookmarks, Folders, etc. -- bool GetBookmark(BookmarkDataStruct& NewBM, const char Tag[], int Parent, ifstream& InputFile, ofstream& ErrorLogFile); bool GetFolder(FolderDataStruct& NewFld, const char Tag[], int CurrentEntryIndex, ifstream& InputFile, ofstream& ErrorLogFile); bool GetTitle(char TitleString[], ifstream& InputFile, ofstream& ErrorLogFile); //-- Fetch & Analyze tags -- bool GetTag(ifstream& InputFile, char TagString[]); int NumericalValueFromTag(const char Tag[], const char Key[]); void StringValueFromTag(const char Tag[], const char Key[], char ReturnValue[]); //-- Handle Errors -- void WriteErrorLogHeader(const char ErrorLogFileName[], ofstream& ErrorLog); void OutputError(const char Tag[], const char ErrorText[], ifstream& InputFile, ofstream& ErrorLog); //-- Navigate & Manipulate Present Working Folder -- int CurrentFolder(const int PWF[]); int ParentFolder(const int PWF[]); void ChangeToFolder(int PWF[], int NewFolder); void ChangeToPreviousFolder(int PWF[]); ////////////////////////////////////////////////////////// // Name: ImportBookmarks // // // // Description: This function loads the bookmark // // information from the HTML bookmark file generated // // by Netscape Navigator 4.0 and earlier. // // // // DESCRIPTION OF ALGORITHM : Load a line, figure out // // what it is, put it into the appropriate sectopm // // of the data structure. // // // // Called By: WebAddressChecker() // // CALLS: // // WriteErrrorLogHeader // // GetTag // // Prompt // // GetBookmarkr // // GetFolder // // CurrentFolder // // ParentFolder // // ChangeToFolder // // ChangeToPreviousFolder // // AddFolder // // AddBookmark // // // // Parameters: // // BookmarkListHeadPtr: Data out - Pointer // // to the first item in the // // BookmarkDataStruct double-linked // // list. // // FolderListHeadPtr: Data out - Pointer // // to the first item in the // // FolderDataStruct double-linked list // // InputFileName: Data in - An array that // // contains the relative filename of // // the bookmark file to be loaded. // // ErrorLogFileName: Data in - An array // // that contains the relative filename // // of the error log to be generated. // // Returns: True: iff the file was loaded with no // // unrecoverable errors. // // False: iff an unrecoverable error was // // encountered in the file. // // Revisions: // // 3/26/1998 Lucas Scharf: Comment out WAL // // DisplayStr() calls // ////////////////////////////////////////////////////////// bool ImportBookmarks(BookmarkDataPtr& BookmarkListHeadPtr, FolderDataPtr& FolderListHeadPtr, const char InputFileName[], const char ErrorLogFileName[]){ //--- Variable Declarations --- //-- Files -- ifstream BookmarkFile; //The HTML Bookmark File to be read into BookmarkData ofstream ErrorLogFile; //The error log file //-- Heirarchy Managment --- int PWF[10]={-1}, //Present Working Folder. Analagous to a working directory. //Limites the program to handling folders 9 folders deep or less. //(1 position is reserved for a null terminator) FolderCount=0; //Number of folders added //-- History -- LastAddedType LastAdded=NONE; //Enumerated Variable storing the type of the last item loaded into the lists //-- Scratch -- char CurrentTag[StringLength+1]; //The tag currently being examined char ScratchString[StringLength+1]; //Scratch string; used to temporarily hold the title of the bookmark file BookmarkDataStruct ScratchBkM; //Scratch bookmark; used to temporarily hold bookmarks FolderDataStruct ScratchFldr; //Scratch folder; used to temporarily hold folders //-- Open input file -- BookmarkFile.open(InputFileName,ios::nocreate); if (!BookmarkFile){ //DisplayStr("Could not open input file: "); //DisplayStr(InputFileName); //DisplayStr("\n"); return false; }//end if //-- Open Error Log File -- ErrorLogFile.open(ErrorLogFileName); if (!ErrorLogFile){ BookmarkFile.close(); //DisplayStr("Could not open error log: "); //DisplayStr(InputFileName); //DisplayStr("\n"); return false; }//end if //-- Write Error Log Header WriteErrorLogHeader(ErrorLogFileName, ErrorLogFile); //-- Load Bookmark Info -- GetTag(BookmarkFile, CurrentTag); //Priming Read while (BookmarkFile){ //-- Bookmark -- if (!strncmp(CurrentTag, "A", 1)){ if (GetBookmark(ScratchBkM, CurrentTag, 0, BookmarkFile, ErrorLogFile)){ //-- Assign Heirarchy Info -- ScratchBkM.ParentEntryIndex = CurrentFolder(PWF); //Set parent of the bookmark //-- Add bookmark to data structure -- AddBookmark(ScratchBkM, BookmarkListHeadPtr); //Add ScratchBkM to the BookmarkDataStruct list //-- Update History(for loading comment field) -- LastAdded=BOOKMARK; }//End if }//end if-Bookmark //-- Folder -- else if(!strncmp(CurrentTag,"H3", 2)){ if (GetFolder(ScratchFldr, CurrentTag, 0, BookmarkFile, ErrorLogFile)){ //-- Update FolderCount -- FolderCount++; //-- Change working Folder -- ChangeToFolder(PWF, FolderCount); //ChangeToFolder(PWF, BookmarkData.FolderCount+1); //-- Assign Heirarchy Info -- ScratchFldr.ParentEntryIndex = ParentFolder(PWF); //Set Parent of current folder ScratchFldr.EntryIndex = CurrentFolder(PWF); //Set Eindex of current Folder //-- Assign Folder to data structure -- AddFolder(ScratchFldr, FolderListHeadPtr); //Add folder to Folder List //-- Update History(for loading comment field) -- LastAdded=FOLDER; }//End if }//End if-Folder //-- End Folder -- else if(!strncmp(CurrentTag,"/DL", 3)){ ChangeToPreviousFolder(PWF); }//End if-Folder //-- Comment -- else if(!strncmp(CurrentTag,"DD", 2)){ if (LastAdded==BOOKMARK){ //-- Kludgy comment implementation -- // depends on elements being added in reverse order; will fix when I get around to improving AddFolder & AddBookmark BookmarkFile.getline(BookmarkListHeadPtr->Comment, StringLength, '\n'); }//End if if (LastAdded==FOLDER){ //-- Kludgy comment implementation -- // depends on elements being added in reverse order; will fix when I get around to improving AddFolder & AddBookmark BookmarkFile.getline(FolderListHeadPtr->Comment, StringLength, '\n'); }//End if }//End if-Comment //-- Title -- else if(!strncmp(CurrentTag,"TITLE", 5)){ if (GetTitle(ScratchString, BookmarkFile, ErrorLogFile)){ //-- Set Properties -- strcpy(ScratchFldr.Name, ScratchString); strcpy(ScratchFldr.Comment,"** ROOT **"); ScratchFldr.AddDate = 0; ScratchFldr.EntryIndex = -1; ScratchFldr.ParentEntryIndex = ROOTPARENTENTRYINDEX; //-- Assign Folder -- AddFolder(ScratchFldr, FolderListHeadPtr); //Add folder to Folder List }//end if }//End if-Title //-- Get next tag -- GetTag(BookmarkFile, CurrentTag); }//end while //-- Close files -- BookmarkFile.close(); ErrorLogFile.close(); //-- Return a value -- return true; //Yet to be implemented }//End ImportBookmarks ////////////////////////////////////////////////////////// // Name: GetBookmark // // // // Description: This function adds a bookmark to the // // BookmarkData structure // // // // Called By: ImportBookmarks // // // // Parameters: NewBM: Data out - The contents of the // // bookmark being read. // // Tag: Data in - The bookmark tag // // InputFile: Data in & out - Inpput // // filestream from which the tag was // // read; used to read the name of the // // bookmark // // ErrorLogFile: Data through - Output // // filestrem to which errors will be // // sent // // // // Returns: True: iff the the function was executed // // successfully. // // False: iff the the function was executed// // and an unrecoverable error was // // encountered. // ////////////////////////////////////////////////////////// bool GetBookmark(BookmarkDataStruct& NewBM, const char Tag[], int Parent, ifstream& InputFile, ofstream& ErrorLogFile){ //-- Variable Declarations -- char ScratchString[StringLength+1]; //Scratch String //-- Initialize New Bookmark -- NewBM.URL[0] = null; NewBM.Comment[0] = null; NewBM.AddDate = null; NewBM.LastVisit = null; NewBM.LastModified = null; NewBM.ParentEntryIndex = ROOTENTRYINDEX; //-- Extract URL from tag -- StringValueFromTag(Tag, "HREF", ScratchString); if (ScratchString[0]==null){ OutputError(Tag, "Error in bookmark URL field", InputFile, ErrorLogFile); return false; }//End if strcpy(NewBM.URL, ScratchString); //-- Extract ADD_DATE from tag -- NewBM.AddDate = NumericalValueFromTag(Tag, "ADD_DATE"); if (NewBM.AddDate==null){ OutputError(Tag, "Error in bookmark ADD_DATE field", InputFile, ErrorLogFile); return false; }//End if //-- Extract LAST_VISIT from tag -- NewBM.LastVisit = NumericalValueFromTag(Tag, "LAST_VISIT"); if (NewBM.LastVisit==null){ OutputError(Tag, "Error in bookmark LAST_VISIT field", InputFile, ErrorLogFile); return false; }//End if //-- Extract LAST_MODIFIED from tag -- NewBM.LastModified = NumericalValueFromTag(Tag, "LAST_MODIFIED"); if (NewBM.LastModified==null){ OutputError(Tag, "Error in bookmark LAST_MODIFIED field", InputFile, ErrorLogFile); return false; }//End if //-- Grab bookmark name from between tags InputFile.get(NewBM.Name, StringLength, '<'); //Load name from section between tags if (NewBM.Name[0]==null){ OutputError(Tag, "Error in bookmark NAME field", InputFile, ErrorLogFile); return false; }//End if //-- Set parent -- NewBM.ParentEntryIndex = Parent; //-- Return Value return true; }//End GetBookmark ////////////////////////////////////////////////////////// // Name: GetFolder // // // // Description: This function adds a bookmark to the // // BookmarkData structure // // // // Called By: ImportBookmarks // // // // Parameters: InputFile: Data in & out - Input file // // stream from which tag will be read. // // NewFLD: Data out - The folder // // information being returned // // // // Returns: True: iff the the function was executed // // successfully. // // False: iff the the function was executed// // and an unrecoverable error was // // encountered. // ////////////////////////////////////////////////////////// bool GetFolder(FolderDataStruct& NewFld, const char Tag[], int CurrentEntryIndex, ifstream& InputFile, ofstream& ErrorLogFile){ //-- Initialize New Folder -- NewFld.Name[0]=null; NewFld.Comment[0]=null; NewFld.AddDate=0; NewFld.EntryIndex=0; NewFld.ParentEntryIndex=0; //-- Extract ADD_DATE from tag -- NewFld.AddDate = NumericalValueFromTag(Tag, "ADD_DATE"); if (!NewFld.AddDate){ OutputError(Tag, "Error in folder ADD_DATE", InputFile, ErrorLogFile); return false; }//End if //-- Grab bookmark name from between tags InputFile.get(NewFld.Name, StringLength, '<'); //Load name from section between tags if (!NewFld.Name[0]){ OutputError(Tag, "Error in folder NAME", InputFile, ErrorLogFile); return false; }//End if //-- Set parent -- NewFld.ParentEntryIndex = CurrentEntryIndex; //-- Set EntryIndex -- NewFld.EntryIndex = CurrentEntryIndex; //-- Return Value -- return true; }//End GetBookmark ////////////////////////////////////////////////////////// // Name: GetTitle // // // // Description: This function sets the title field in // // BookmarkData. // // // // Called By: ImportBookmarks // // // // Parameters: TitleString: Data out - The title // // string being returned to read the // // chars. // // InputFile: Data in & out - The input // // filestream from which to read // // TitleString // // ErrorLogFile: Data through - The output // // filestream where errors go // // // // Returns: True: iff the the function was executed // // with no unrecoverable errors. // // False: iff the the function was executed// // and an unrecoverable error was // // encountered. // ////////////////////////////////////////////////////////// bool GetTitle(char TitleString[], ifstream& InputFile, ofstream& ErrorLogFile){ //-- Get text of TITLE -- InputFile.get(TitleString, StringLength, '<'); //-- Return Value -- //-- Return Value -- //-- If no title -- if (TitleString[0]==null){ OutputError("", "Error in TITLE" , InputFile, ErrorLogFile); return false; }// End notitle //-- if input past end of file -- else if (InputFile.fail()) { OutputError("File", "Unexpected end of file", InputFile, ErrorLogFile); }//End if EOF //-- if it's all good -- return true; }//End AddTitle ////////////////////////////////////////////////////////// // Name: GetTag // // // // Description: This function uses a call to the // // ifstream class to get the text of the tag to be // // examined. // // // // DESCRIPTION OF ALGORITHM: Standard library calls. // // // // Called By: ImportBookmarks // // // // Parameters: InputFile: Data in - Input file stream // // from which to read the chars. // // TagString: Data out - The text between // // two <> being returned. // // Returns: True: iff the the function was executed // // with no unrecoverable errors. // // False: iff the the function was executed// // and an unrecoverable error was // // encountered. // ////////////////////////////////////////////////////////// bool GetTag(ifstream& InputFile, char TagString[]){ //-- Get Text -- InputFile.ignore(999,'<'); InputFile.get(TagString, StringLength, '>'); InputFile.ignore(999,'>'); //Remove '>' from input stream //-- Return a value -- return true; //Yet to be implemented }//End GetTag ////////////////////////////////////////////////////////// // Name: NumericalValueFromTag // // // // Description: This function searches through // // a HTML tag to find the contents of the specified // // value. // // // // DESCRIPTION OF ALGORITHM: Search and Seize . // // // // Called By: GetBookmark // // GetFolder // // // // Parameters: Tag: Data in - The tag being seached // // Key: Control - The key for the value // // being saught // // Returns: The numerical value associated with the // // key // ////////////////////////////////////////////////////////// int NumericalValueFromTag(const char Tag[], const char Key[]){ //-- Varaible Declarations char *Pointer; //Pointer used in searching through strings int Index; //Temporary Index used in searching through strings char String[StringLength+1]; //Used for temporarily storing strings befor //-- Find key in tag -- Pointer = strstr(Tag, Key); //Find key in tag //-- If key found, deal with it -- if (Pointer!=null){ //-- Move to beginning of value -- while (*Pointer!='"' && *Pointer!=null) //if not current char is a quote ur null Pointer++; //Incriment through string Pointer++; //Move past '"' to first char of value //-- Copy chars to temporary string; convert and return -- Index=0; while (*Pointer!='"' && *Pointer!=null){ String[Index] = *Pointer; //Copy current char to inro scratch string Pointer++; //Move to next position in tag Index++; //Move to next position in scratch string }// End while //-- Convert to int & return if (String[0]) return atoi(String); //return value if found else return 0; //return error if not value not exist } else { //-- Handle key-not-found error -- return 0; //return error if key not found }//End if }//end NumericalValueFromTag ////////////////////////////////////////////////////////// // Name: StringValueFromTag // // // // Description: This function searches through // // a HTML tag to find the contents of the specified // // value. // // // // DESCRIPTION OF ALGORITHM: Search and Seize . // // // // Called By: GetBookmark // // // // Parameters: Tag: Data in - The tag being seached // // Key: Control - The key for the value // // being saught // // ReturnValue: Data out - The string // // value associated with the key // // Returns: Nothing // ////////////////////////////////////////////////////////// void StringValueFromTag(const char Tag[], const char Key[], char ReturnValue[]){ //-- Variable Declarations -- char *Pointer; //Current position in the Tag int Index; //Current position in ReturnValue //-- Find key in tag & move to first char of the value -- Pointer = strstr(Tag, Key); //find key in Tag //-- If key found, deal with it; else, return null string -- if (Pointer!=null){ //-- Move to '"' -- while (*Pointer!='"' && *Pointer!=null) Pointer++; //Increment through strings Pointer++; //Move past '"' to first char of value //-- Copy chars to ReturnValue -- Index=0; //Move to beginning of ReturnValue while (*Pointer!='"' && *Pointer!=null){ ReturnValue[Index] = *Pointer; //Copy current char from Tag to ReturnValue Pointer++; //Move to next position in tag Index++; //Move to next position in URL string }// End while //-- Add null terminator to string -- ReturnValue[Index]=null; } else { ReturnValue[0] = null; //Return null string }//End if }//End function ////////////////////////////////////////////////////////// // Name: WriteErrorLogHeader // // // // Description: This function writes the error log file // // header to the supplied output filestream. // // // // Algorithm: Uses streamio to format and produce // // output. // // // // Called By: ImportBookmarks // // // // Calls: None // // // // PARAMETERS: ErrorLog: Data in - The output // // filestream into which the header will be inserted. // // ////////////////////////////////////////////////////////// void WriteErrorLogHeader(const char ErrorLogFileName[], ofstream& ErrorLog){ //-- First line -- ErrorLog << "Bookmark Error Report: " << ErrorLogFileName << endl; //-- Dividing Line -- for(int Counter=0; Counter < 79; Counter++) ErrorLog << '='; ErrorLog << endl; ErrorLog.flush(); } ////////////////////////////////////////////////////////// // Name: OutputError // // // // Description: This function formats and outputs error // // messages. // // // // Algorithm: Uses streamio to format and produce // // output. // // // // Called By: GetTitle // // GetBookmark // // GetFolder // // // // Calls: None // // // // PARAMETERS: Tag: Data in - The tag preceding or in // // which the error was detected. // // ErrorText: Data in - The text of the // // error // // InputFile: Data in & out - Used to grab // // the rest of the line following the // // tag. // // ErrorLog: Data out - The output // // filestream into which the formatted // // errors will be inserted. // ////////////////////////////////////////////////////////// void OutputError(const char Tag[], const char ErrorText[], ifstream& InputFile, ofstream& ErrorLog){ //-- Variable Declarations -- char RestOfLine[StringLength+1]; //-- Get rest of line -- InputFile.getline(RestOfLine, StringLength, '\n'); //-- Output Error -- ErrorLog << '<' << Tag << '>' << RestOfLine << "\t -- " << ErrorText << endl; ErrorLog << endl; }//End OutputError ////////////////////////////////////////////////////////// // Name: CurrentFolder // // // // Description: This function reads the Present Working // // Folder array and returns the EntryIndex of the // // current folder. // // // // Algorithm: Step through array until a zero value is // // found. // // // // Called By: ImportBookmarks // // // // Calls: None // // // // PARAMETERS: PWF: Data in - An integer array // // containing the path to the present // // working folder. // // Returns: The EntryIndex of the present working // // folder. // ////////////////////////////////////////////////////////// int CurrentFolder(const int PWF[]){ //-- Variable Declarations -- int Idx=0; //Index used for stepping through the PWF array //-- Find EntryIndex of current Folder -- while (PWF[Idx]!=null){ Idx++; }//End While //-- Return Value -- return PWF[Idx-1]; }//End CurrentFolder ////////////////////////////////////////////////////////// // Name: ParentFolder // // // // Description: This function reads the Present Working // // Folder array and returns the EntryIndex of the // // parent of the current folder. // // // // Algorithm: Step through array until a zero value is // // found, // // // // Called By: ImportBookmarks // // // // Calls: None // // // // PARAMETERS: PWF: Data in - An integer array // // containing the path to the present // // working folder. // // Returns: The EntryIndex of the present working // // folder. // ////////////////////////////////////////////////////////// int ParentFolder(const int PWF[]){ //-- Variable Declarations -- int Idx=0; //Index used for stepping through the PWF array //-- Find EntryIndex of current Folder -- while (PWF[Idx]!=null){ Idx++; }//End While //-- Return Value -- return PWF[Idx-2]; }//End ParentFolder ////////////////////////////////////////////////////////// // Name: ChangeToFolder // // // // Description: This function reads the Present Working // // Folder array and inserts the EntryIndex of the // // folder in the first unused element. // // // // Algorithm: Step through array until a zero value is // // found. Set values // // // // Called By: ImportBookmarks // // // // Calls: None // // // // PARAMETERS: PWF: Data in & out - An integer array // // containing the path to the present // // working folder. // // NewFolder: Data in - The folder to add // // to the path. // // Returns: Nothing. // ////////////////////////////////////////////////////////// void ChangeToFolder(int PWF[], int NewFolder){ //-- Variable Declarations -- int Idx=0; //Index used for stepping through the PWF array //-- Find EntryIndex of current Folder -- while (PWF[Idx]!=null){ Idx++; }//End While //-- Set folder & null marker -- PWF[Idx]=NewFolder; PWF[Idx+1]=null; }//End ChangeToFolder ////////////////////////////////////////////////////////// // Name: ChangeToPreviousFolder // // // // Description: This function reads the Present Working // // Folder array and sets the last entry to the // // null-terminator value. // // // // Algorithm: Step through array until a zero value is // // found. Set values. // // // // Called By: ImportBookmarks // // // // Calls: None // // // // PARAMETERS: PWF: Data in & out - An integer array // // containing the path to the present // // working folder. // // Returns: Nothing. // ////////////////////////////////////////////////////////// void ChangeToPreviousFolder(int PWF[]){ //-- Variable Declarations -- int Idx=0; //Index used for stepping through the PWF array //-- Find EntryIndex of current Folder -- while (PWF[Idx]!=null){ Idx++; }//End While //-- Set null marker -- PWF[Idx-1]=null; }//End ChangeToFolder