SoFunction
Updated on 2025-03-07

C# Get the first image of HTML text and the sample code for clipping content summary

Get the first picture

The data we want to obtain is an HTML text. Maybe there are many pictures in this text. You need to take a picture as the title, that is, as the main picture. At this time, you can use the following method to obtain the first picture.

Sample code

#region Get the first image/// <summary>
/// Get the image address of HTML text/// </summary>
/// <param name="content"></param>
/// <returns></returns>/
/// 
public ArrayList getimgurl(string html)
{
  ArrayList resultStr = new ArrayList();
  Regex r = new Regex(@"<IMG[^>] src=s*(?:´(?<src>[^´] )´|""(?<src>[^""] )""|(?<src>[^>s] ))s*[^>]*>", );//Ignore case  MatchCollection mc = (html);

  foreach (Match m in mc)
  {
    (["src"].());
  }
  if ( > 0)
  {
    return resultStr;
  }
  else
  {
    ();
    return resultStr;
  }
}
#endregion

Notice:The returned above is aArrayList Collection, containing all Imgs in the textsrc, so that we can access imgsrcIt's

Intercept HTML text

Sometimes the data we get is a piece of HTML text, and we need to intercept part of the HTML text as a content summary. At this time, we can use the following method.

Sample code

 #region News Content Summary/// <summary>
/// News content summary/// </summary>
/// <param name="sString"></param>
/// <param name="nLeng"></param>
/// <returns></returns>
public string GetContentSummary(string content, int length, bool StripHTML)
{
  if ((content) || length == 0)
    return "";
  if (StripHTML)
  {
    Regex re = new Regex("<[^>]*>");
    content = (content, "");
    content = (" ", "").Replace(" ", "");
    if ( <= length)
      return content;
    else
      return (0, length)  "……";
  }
  else
  {
    if ( <= length)
      return content;

    int pos = 0, npos = 0, size = 0;
    bool firststop = false, notr = false, noli = false;
    StringBuilder sb = new StringBuilder();
    while (true)
    {
      if (pos >= )
        break;
      string cur = (pos, 1);
      if (cur == "<")
      {
        string next = (pos  1, 3).ToLower();
        if (("p") == 0 && ("pre") != 0)
        {
          npos = (">", pos)  1;
        }
        else if (("/p") == 0 && ("/pr") != 0)
        {
          npos = (">", pos)  1;
          if (size < length)
            ("<br/>");
        }
        else if (("br") == 0)
        {
          npos = (">", pos)  1;
          if (size < length)
            ("<br/>");
        }
        else if (("img") == 0)
        {
          npos = (">", pos)  1;
          if (size < length)
          {
            ((pos, npos - pos));
            size = npos - pos  1;
          }
        }
        else if (("li") == 0 || ("/li") == 0)
        {
          npos = (">", pos)  1;
          if (size < length)
          {
            ((pos, npos - pos));
          }
          else
          {
            if (!noli && ("/li") == 0)
            {
              ((pos, npos - pos));
              noli = true;
            }
          }
        }
        else if (("tr") == 0 || ("/tr") == 0)
        {
          npos = (">", pos)  1;
          if (size < length)
          {
            ((pos, npos - pos));
          }
          else
          {
            if (!notr && ("/tr") == 0)
            {
              ((pos, npos - pos));
              notr = true;
            }
          }
        }
        else if (("td") == 0 || ("/td") == 0)
        {
          npos = (">", pos)  1;
          if (size < length)
          {
            ((pos, npos - pos));
          }
          else
          {
            if (!notr)
            {
              ((pos, npos - pos));
            }
          }
        }
        else
        {
          npos = (">", pos)  1;
          ((pos, npos - pos));
        }
        if (npos <= pos)
          npos = pos  1;
        pos = npos;
      }
      else
      {
        if (size < length)
        {
          (cur);
          size ;
        }
        else
        {
          if (!firststop)
          {
            ("……");
            firststop = true;
          }
        }
        pos ;
      }

    }
    return ();
  }
}
#endregion

Summarize

The above is all about using C# to obtain the first image in an HTML text and the content summary. I hope that the content of this article will be helpful to everyone's learning or using C#. If you have any questions, you can leave a message to communicate. Thank you for your support.